import pandas as pd
import japanese_address
import re

class PrefectureData:
    def __init__(self):
        self.pref_code_map = {"01": "Hokkaido",
                              "02": "Aomori",
                              "03": "Iwate",
                              "04": "Miyagi",
                              "05": "Akita",
                              "06": "Yamagata",
                              "07": "Fukushima",
                              "08": "Ibaraki",
                              "09": "Tochigi",
                              "10": "Gunma",
                              "11": "Saitama",
                              "12": "Chiba",
                              "13": "Tokyo",
                              "14": "Kanagawa",
                              "19": "Yamanashi",
                              "15": "Niigata",
                              "16": "Toyama",
                              "17": "Ishikawa",
                              "18": "Fukui",
                              "20": "Nagano",
                              "21": "Gifu",
                              "22": "Shizuoka",
                              "23": "Aichi",
                              "24": "Mie",
                              "25": "Shiga",
                              "26": "Kyoto",
                              "27": "Osaka",
                              "28": "Hyogo",
                              "29": "Nara",
                              "30": "Wakayama",
                              "31": "Tottori",
                              "32": "Shimane",
                              "33": "Okayama",
                              "34": "Hiroshima",
                              "35": "Yamaguchi",
                              "36": "Tokushima",
                              "37": "Kagawa",
                              "38": "Ehime",
                              "39": "Kochi",
                              "40": "Fukuoka",
                              "41": "Saga",
                              "42": "Nagasaki",
                              "43": "Kumamoto",
                              "44": "Oita",
                              "45": "Miyazaki",
                              "46": "Kagoshima",
                              "47": "Okinawa"}

    @staticmethod
    def make_english_jp_map():
        mapper = {v: k for k, v in japanese_address.JAPANESE_PREFECTURES.items()}
        mapper['Hyogo'] = '兵庫県'
        mapper['Kochi'] = '高知県'
        mapper['Oita'] = '大分県'
        mapper['Miyazaki'] = '宮崎県'

        return mapper

    @staticmethod
    def strip_ken_fu_to(pref_name_list):
        tofuken = re.compile("[都府県]")
        pref_name_list = [tofuken.sub("", pref) if pref != "京都府" else "京都" for pref in pref_name_list]

        return pref_name_list

    @staticmethod
    def make_pref_ken_noken_mapper():
        pref_names = japanese_address.JAPANESE_PREFECTURES
        pref_names_no_tofuken = PrefectureData.strip_ken_fu_to(pref_names)

        mapper = dict(zip(pref_names, pref_names_no_tofuken))
        return mapper


def find_elite(unique_cand_ds):
    elite_cat_one = unique_cand_ds.loc[:, ['rs_bcrat', 'rs_med', 'rs_law', 'rs_seshu']] == 1
    elite = elite_cat_one.apply(any, axis=1)
    return elite


def find_all_in_pref_cands(subset_of_unique_cand):

    grouped_by_all_pref = subset_of_unique_cand.groupby('all_in_pref').count().reset_index()
    grouped_by_all_pref = grouped_by_all_pref.iloc[:, range(2)]

    percentage_all_in_pref = grouped_by_all_pref.iloc[1, -1] / sum(grouped_by_all_pref.iloc[:, -1])

    grouped_by_uni_in_pref = subset_of_unique_cand.groupby('uni_in_pref').count().reset_index()
    grouped_by_uni_in_pref = grouped_by_uni_in_pref.iloc[:, range(2)]

    percentage_uni_in_pref = grouped_by_uni_in_pref.iloc[1, -1] / sum(grouped_by_uni_in_pref.iloc[:, -1])

    grouped_by_borninken = subset_of_unique_cand.groupby('rs_borninken').count().reset_index()
    grouped_by_borninken = grouped_by_borninken.iloc[:, range(2)]

    percentage_born_in_ken = grouped_by_borninken.iloc[1, -1] / sum(grouped_by_borninken.iloc[:, -1])

    return percentage_born_in_ken, percentage_uni_in_pref, percentage_all_in_pref

unique_cand = pd.read_csv("data/candidate_dataset.csv")
unique_cand.loc[:, 'elite'] = find_elite(unique_cand)
en_jp_pref_map = PrefectureData.make_english_jp_map()

jp_cand_pref = [en_jp_pref_map[x] for x in unique_cand.prefecture_name]
jp_cand_pref = PrefectureData.strip_ken_fu_to(jp_cand_pref)

unique_cand.loc[:, 'prefecture_jp'] = jp_cand_pref

uni_in_pref = (unique_cand.prefecture_jp == unique_cand.uni_prefecture)
all_in_pref = (uni_in_pref & unique_cand.rs_borninken)

unique_cand.loc[:, 'all_in_pref'] = all_in_pref
unique_cand.loc[:, 'uni_in_pref'] = uni_in_pref

elite_cand = unique_cand.loc[unique_cand.elite, :]
jimin_cand = unique_cand.loc[unique_cand.party == '自民', :]
kyosan_cand = unique_cand.loc[unique_cand.party == '共産', :]
minshu_cand = unique_cand.loc[unique_cand.party == '民主', :]
shutoken_cand = unique_cand.loc[unique_cand.prefecture_jp.isin(['東京', '神奈川', '埼玉', '千葉']), :]
tokyo_cand = unique_cand.loc[unique_cand.prefecture_jp == '東京', :]
bureaucrats_cand = unique_cand.loc[unique_cand.rs_bcrat == 1, :]
local_pol_cand = unique_cand.loc[unique_cand.rs_assy == 1, :]
seshu_cand = unique_cand.loc[unique_cand.rs_seshu == 1, :]

data_subsets = [
    unique_cand,
    elite_cand,
    bureaucrats_cand,
    seshu_cand,
    local_pol_cand,
    jimin_cand,
    kyosan_cand,
    minshu_cand,
    tokyo_cand
]

cand_subsets = [
    'All', 'Elite', 'Bureaucrat', 'Dynastic Candidate', 'Local Politician', 'LDP', 'JCP', 'DPJ', 'Tokyo'
]

candidate_data = [find_all_in_pref_cands(subset) for subset in data_subsets]
candidate_data_formatted = pd.DataFrame(candidate_data, columns=['Born in Prefecture', 'University in Prefecture', 'Both'])

candidate_data_formatted.loc[:, 'Candidate Type'] = cand_subsets

candidate_data_formatted = candidate_data_formatted.loc[:,
                           ['Candidate Type', 'Born in Prefecture', 'University in Prefecture', 'Both']]
candidate_data_formatted = candidate_data_formatted.round(2)

percentage_form = candidate_data_formatted.iloc[:, [1, 2, 3]] * 100
percentage_sign = percentage_form.astype(int).astype(str) + '%'

candidate_data_formatted.iloc[:, [1, 2, 3]] = percentage_sign
candidate_data_formatted = candidate_data_formatted.iloc[[0, 2, 3, 4, 5, 7, 8]]

candidate_data_formatted.to_csv('data/local_candidates.csv', index=False)

